library(kableExtra)
suppressMessages(library(plotly))
library(dplyr)
library(DT)
library(stringr)
suppressMessages(library(ggplot2))
suppressMessages(library(dplyr))
library(tidytext)
library(tidyr)
library(xlsx)
library(reshape2)
#library(usmap)
#library(maps)
For this project, I will:
1. Map out the number of amputations for diabetetics by state
2. Determine what state has the highest # of amputations and which gender is it predominately for.
3. Determine if across the U.S. if amputations are a predominately higher for a specific gender.
Initially, I wanted to do an analysis on diabetic amputations and socioeconomic factors. However, I was unable to obtain a large portion of the data due to HIPAA laws. I then turned to the basic question of - what is the prevalence of diabetes in the U.S.? In doing so, I found some alarming information regarding occurrences of diabetic amputations by gender.
diabetes <-read.csv("https://raw.githubusercontent.com/nilsabermudez/607/master/complications_hospitalizations_99.csv", header=TRUE, sep=",")
CaData <- diabetes[c(2,3,4,6,26)]
#CaData
CaData2 <-subset(CaData,age==0)
CaData3 <-subset(CaData2,sex==0)
#CaData3
Completerecords <- na.omit(CaData3)
#Completerecords
p <- ggplot2::ggplot(data=Completerecords, aes(x=state, y=lea_count, color = year,group=year)) +
geom_line(show.legend=TRUE) +
theme(legend.position = "top") +
ggplot2::labs(x="States",y="Number of Amputations") + ggtitle("Amputations across the U.S. due to Diabetes")+
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.35, hjust=.5))
plotly::ggplotly(p,colors = 'YlOrRd' )
Texas <- subset(CaData, state=="TEXAS", select = c("state","year", "sex", "age","lea_count"))
TexasMF <- subset(Texas, state=="TEXAS", select = c("state","year", "sex", "age","lea_count"))
Texas <- Texas[Texas$sex %in% c(1, 2),]
Texas <- subset(Texas, age==0, select = c("state","year", "sex", "age","lea_count"))
p<-ggplot(data=Texas, aes(y=lea_count, x=factor(sex))) +
geom_bar(stat="identity", width=0.7, fill="steelblue",show.legend=TRUE)+
theme(legend.position = "top") +
ggplot2::labs(x="Amputations by Gender",y="Number of Amputations") + ggtitle("2014 - State of Texas Amputations by Gender")+
scale_x_discrete(labels=c("1" = "Males", "2" = "Females"))
p + guides(color = guide_legend(reverse = TRUE))
AllData <- subset(CaData, age==0, select = c("state","year", "sex", "age","lea_count"))
USAmputations <-subset(AllData, year==2014, select = c("state","year", "sex", "age","lea_count"))
USAmputations <- USAmputations[USAmputations$sex %in% c(1, 2),]
#USAmputations
p <- ggplot2::ggplot(data=USAmputations, aes(x=state, y=lea_count, group=state, fill= sex, color=sex)) +geom_bar(stat = "identity", position = "dodge", width=.5)+theme(legend.position = "none") +
scale_x_discrete(labels=c("1" = "Males", "2" = "Females")) +
ggplot2::labs(x="States",y="Number of Amputations") + ggtitle("Amputations across the U.S. due to Diabetes by Gender - 2014")+
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.35, hjust=.5)) +
theme(panel.grid.major.x = element_blank())
ggplotly(tooltip = c("text", "x", "fill", "y"))
CensusPoverty <-read.csv("https://raw.githubusercontent.com/nilsabermudez/607/master/est14us.csv",skip=5, header=TRUE, sep=",")
CensusPoverty <- subset(CensusPoverty, select = c(Postal.Code, Poverty.Estimate..All.Ages))
datatable(CensusPoverty, options = list(pageLength = 5))
In this project:
This analysis could be presented to the diabetic community as a teaching example of the dangerous complications due to having diabetes.
References:
https://www.census.gov/data/datasets/2014/demo/saipe/2014-state-and-county.html
https://nccd.cdc.gov/CKD/default.aspx